Take Home Exercise 1

Published

November 27, 2023

Modified

November 30, 2023

Getting Started

pacman::p_load(tmap,sf,tidyverse,knitr,h3jsr,dplyr, mapview)

Preparing the Flow Data

Importing the OD data

Firstly, we will import the Passenger Volume by Origin Destination Bus Stops data set downloaded from LTA DataMall by using read_csv() of readr package.

odbus <- read_csv("data/aspatial/origin_destination_bus_202308.csv")

Check odbus tibble data frame that values in OROGIN_PT_CODE and DESTINATION_PT_CODE are in numeric data type.

glimpse(odbus)
Rows: 5,709,512
Columns: 7
$ YEAR_MONTH          <chr> "2023-08", "2023-08", "2023-08", "2023-08", "2023-…
$ DAY_TYPE            <chr> "WEEKDAY", "WEEKENDS/HOLIDAY", "WEEKENDS/HOLIDAY",…
$ TIME_PER_HOUR       <dbl> 16, 16, 14, 14, 17, 17, 17, 17, 7, 17, 14, 10, 10,…
$ PT_TYPE             <chr> "BUS", "BUS", "BUS", "BUS", "BUS", "BUS", "BUS", "…
$ ORIGIN_PT_CODE      <chr> "04168", "04168", "80119", "80119", "44069", "4406…
$ DESTINATION_PT_CODE <chr> "10051", "10051", "90079", "90079", "17229", "1722…
$ TOTAL_TRIPS         <dbl> 7, 2, 3, 10, 5, 4, 3, 22, 3, 3, 7, 1, 3, 1, 3, 1, …

Origin & Destination Bus Stop Code

odbus$ORIGIN_PT_CODE <-
as.factor(odbus$ORIGIN_PT_CODE)
odbus$DESTINATION_PT_CODE <-
as.factor(odbus$DESTINATION_PT_CODE)

Extracting the Study Data

Filter out data that belong to trips that occur on:

  • “Weekday” and “6-9am” (wdmp)

    wdmp <-  odbus %>%
      filter(DAY_TYPE == "WEEKDAY") %>%
      filter(TIME_PER_HOUR >= 6 &
               TIME_PER_HOUR <= 9) %>%
      group_by(ORIGIN_PT_CODE) %>%
      summarise(TRIPS = sum(TOTAL_TRIPS))
  • “Weekday” and “5-8pm” (wdap)

    wdap <-  odbus %>%
      filter(DAY_TYPE == "WEEKDAY") %>%
      filter(TIME_PER_HOUR >= 17 &
               TIME_PER_HOUR <= 20) %>%
      group_by(ORIGIN_PT_CODE) %>%
      summarise(TRIPS = sum(TOTAL_TRIPS))
  • “Weekends/Holiday” and “11am-2pm” (hmp)

    hmp <- odbus %>%
      filter(DAY_TYPE == "WEEKENDS/HOLIDAY") %>%
      filter(TIME_PER_HOUR >= 11 &
               TIME_PER_HOUR <= 14) %>%
      group_by(ORIGIN_PT_CODE) %>%
      summarise(TRIPS = sum(TOTAL_TRIPS))
  • “Weekends/Holiday” and “4pm-7pm” (hep)

    hep <- odbus %>%
      filter(DAY_TYPE == "WEEKENDS/HOLIDAY") %>%
      filter(TIME_PER_HOUR >= 16 &
               TIME_PER_HOUR <= 19) %>%
      group_by(ORIGIN_PT_CODE) %>%
      summarise(TRIPS = sum(TOTAL_TRIPS))

Check resulting data tables

kable(head(wdmp)) 
ORIGIN_PT_CODE TRIPS
01012 1973
01013 952
01019 1789
01029 2561
01039 2938
01059 1651
kable(head(wdap)) 
ORIGIN_PT_CODE TRIPS
01012 8448
01013 7328
01019 3608
01029 9317
01039 12937
01059 2133
kable(head(hmp)) 
ORIGIN_PT_CODE TRIPS
01012 2273
01013 1697
01019 1511
01029 3272
01039 5424
01059 1062
kable(head(hep))
ORIGIN_PT_CODE TRIPS
01012 3208
01013 2796
01019 1623
01029 4244
01039 7403
01059 1190

Output saved in rds format for future use

write_rds(wdmp, "data/rds/wdmp.rds") 
write_rds(wdap, "data/rds/wdap.rds") 
write_rds(hmp, "data/rds/hmp.rds") 
write_rds(hep, "data/rds/hep.rds")

Import the rds file into R environment

wdmp <- read_rds("data/rds/wdmp.rds") 
wdap <- read_rds("data/rds/wdap.rds") 
hmp <- read_rds("data/rds/hmp.rds") 
hep <- read_rds("data/rds/hmp.rds")

Working with Geospatial Data

Two geospatial data (shapefile) will be used for this exercise:

  • BusStop: Provides location of bus stop as at Q4 2022

  • MPSZ-2019: This data provides the sub-zone boundary of URA Master Plan 2019

Importing geospatial data

busstop <- st_read(dsn = "Data/geospatial",
                   layer = "BusStop") %>%
  st_transform(crs = 3414)
Reading layer `BusStop' from data source 
  `/Users/youting/ytquek/ISSS624/Project/data/geospatial' using driver `ESRI Shapefile'
Simple feature collection with 5161 features and 3 fields
Geometry type: POINT
Dimension:     XY
Bounding box:  xmin: 3970.122 ymin: 26482.1 xmax: 48284.56 ymax: 52983.82
Projected CRS: SVY21
mpsz <- st_read(dsn = "data/geospatial",
                layer = "MPSZ-2019") %>%
  st_transform(crs = 3414)
Reading layer `MPSZ-2019' from data source 
  `/Users/youting/ytquek/ISSS624/Project/data/geospatial' using driver `ESRI Shapefile'
Simple feature collection with 332 features and 6 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: 103.6057 ymin: 1.158699 xmax: 104.0885 ymax: 1.470775
Geodetic CRS:  WGS 84

Check structure of busstop and MPSZ  sf tibble data frame

glimpse(busstop)
Rows: 5,161
Columns: 4
$ BUS_STOP_N <chr> "22069", "32071", "44331", "96081", "11561", "66191", "2338…
$ BUS_ROOF_N <chr> "B06", "B23", "B01", "B05", "B05", "B03", "B02A", "B02", "B…
$ LOC_DESC   <chr> "OPP CEVA LOGISTICS", "AFT TRACK 13", "BLK 239", "GRACE IND…
$ geometry   <POINT [m]> POINT (13576.31 32883.65), POINT (13228.59 44206.38),…
glimpse(mpsz)
Rows: 332
Columns: 7
$ SUBZONE_N  <chr> "MARINA EAST", "INSTITUTION HILL", "ROBERTSON QUAY", "JURON…
$ SUBZONE_C  <chr> "MESZ01", "RVSZ05", "SRSZ01", "WISZ01", "MUSZ02", "MPSZ05",…
$ PLN_AREA_N <chr> "MARINA EAST", "RIVER VALLEY", "SINGAPORE RIVER", "WESTERN …
$ PLN_AREA_C <chr> "ME", "RV", "SR", "WI", "MU", "MP", "WI", "WI", "SI", "SI",…
$ REGION_N   <chr> "CENTRAL REGION", "CENTRAL REGION", "CENTRAL REGION", "WEST…
$ REGION_C   <chr> "CR", "CR", "CR", "WR", "CR", "CR", "WR", "WR", "CR", "CR",…
$ geometry   <MULTIPOLYGON [m]> MULTIPOLYGON (((33222.98 29..., MULTIPOLYGON (…

Geospatial Data Wrangling

Combining Busstop & mpsz

busstop_mpsz <- st_intersection(busstop, mpsz) %>%
  select(BUS_STOP_N, SUBZONE_C) %>%
  st_drop_geometry()

Save output into rds format

write_rds(busstop_mpsz, "data/rds/busstop_mpsz.csv")

Setting up the Hexagon Grid

Drawing the Hexagon Grid

Draw hexagon grid over the mpsz map

area_honeycomb_grid = st_make_grid(mpsz, c(250, 250), what = "polygons", square = FALSE)

Convert the hexagon grid to sf (simple features) object and add a new column grid_id (sequential identifier) to it

honeycomb_grid_sf = st_sf(area_honeycomb_grid) %>%
  # add grid ID
  mutate(grid_id = 1:length(lengths(area_honeycomb_grid)))

Determine which bus stops is contained within which hexagon using st_within.

busstop_honeycomb <- st_intersection(honeycomb_grid_sf,busstop) %>%
  select(BUS_STOP_N, grid_id) %>%
  st_drop_geometry()

Save output into rds format

write_rds(busstop_honeycomb, "data/rds/busstop_honeycomb.csv")

Check for duplicate records

duplicate <- busstop_honeycomb %>%
  group_by_all() %>%
  filter(n()>1) %>%
  ungroup()

Retain unique records only

busstop_honeycomb <- unique(busstop_honeycomb)

Only include hexagon grid IDs that have bus stop numbers

busstop_honeycomb <- busstop_honeycomb %>%
  filter(!is.na(grid_id) & grid_id > 0)

Assign Each Bus Stop to a Grid ID

For all time periods, assign bus stops to a hexagon grid id. All NULL and 0 values are removed.

wdmp_gridid <- left_join(busstop_honeycomb, wdmp,
            by = c("BUS_STOP_N" = "ORIGIN_PT_CODE")) 

wdmp_gridid <- wdmp_gridid %>%
  filter(!is.na(TRIPS) & TRIPS > 0)


wdap_gridid <- left_join(busstop_honeycomb, wdap,
            by = c("BUS_STOP_N" = "ORIGIN_PT_CODE")) 

wdap_gridid <- wdap_gridid %>%
  filter(!is.na(TRIPS) & TRIPS > 0)


hmp_gridid <- left_join(busstop_honeycomb, hmp,
            by = c("BUS_STOP_N" = "ORIGIN_PT_CODE")) 

hmp_gridid <- hmp_gridid %>%
  filter(!is.na(TRIPS) & TRIPS > 0)


hep_gridid <- left_join(busstop_honeycomb, hep,
            by = c("BUS_STOP_N" = "ORIGIN_PT_CODE")) 

hep_gridid <- hep_gridid %>%
  filter(!is.na(TRIPS) & TRIPS > 0)

Choropleth Visualisation

Weekday Morning Peak 6am-9am

Sum up the trips per hexagon

total_trips_by_grid <- wdmp_gridid %>%
  group_by(grid_id) %>%
  summarise(total_trips = sum(TRIPS, na.rm = TRUE))

Merge geospatial data

total_trips_by_grid <- total_trips_by_grid %>%
  left_join(honeycomb_grid_sf, by = c("grid_id" = "grid_id"))

total_trips_by_grid_sf <- st_sf(total_trips_by_grid)

Plot the Choropleth map

tmap_mode("view")

map_honeycomb = tm_shape(total_trips_by_grid_sf) +
  tm_fill(
    col = "total_trips",
    palette = "Reds",
    style = "cont",
    title = "Total Trips Taken - Weekday Morning Peak 6-9am",
    id = "grid_id",
    showNA = FALSE,
    alpha = 0.6,
    popup.vars = c(
      "Number of trips: " = "total_trips"
    ),
    popup.format = list(
      total_trips = list(format = "f", digits = 0)
    )
  ) +
  tm_borders(col = "grey40", lwd = 0.4)

map_honeycomb

<insert description of spatial patterns in 200 words>

Weekday Afternoon Peak 5pm-8pm

Sum up the trips per hexagon

total_trips_by_grid <- wdap_gridid %>%
  group_by(grid_id) %>%
  summarise(total_trips = sum(TRIPS, na.rm = TRUE))

Merge geospatial data

total_trips_by_grid <- total_trips_by_grid %>%
  left_join(honeycomb_grid_sf, by = c("grid_id" = "grid_id"))

total_trips_by_grid_sf <- st_sf(total_trips_by_grid)

Plot the Choropleth map

tmap_mode("view")

map_honeycomb = tm_shape(total_trips_by_grid_sf) +
  tm_fill(
    col = "total_trips",
    palette = "Reds",
    style = "cont",
    title = "Total Trips Taken - Weekday Morning Peak 6-9am",
    id = "grid_id",
    showNA = FALSE,
    alpha = 0.6,
    popup.vars = c(
      "Number of trips: " = "total_trips"
    ),
    popup.format = list(
      total_trips = list(format = "f", digits = 0)
    )
  ) +
  tm_borders(col = "grey40", lwd = 0.4)

map_honeycomb

<insert description of spatial patterns in 200 words>

Weekend/Holiday Morning Peak 11am-2pm

Sum up the trips per hexagon

total_trips_by_grid <- hmp_gridid %>%
  group_by(grid_id) %>%
  summarise(total_trips = sum(TRIPS, na.rm = TRUE))

Merge geospatial data

total_trips_by_grid <- total_trips_by_grid %>%
  left_join(honeycomb_grid_sf, by = c("grid_id" = "grid_id"))

total_trips_by_grid_sf <- st_sf(total_trips_by_grid)

Plot the Choropleth map

tmap_mode("view")

map_honeycomb = tm_shape(total_trips_by_grid_sf) +
  tm_fill(
    col = "total_trips",
    palette = "Greens",
    style = "cont",
    title = "Total Trips Taken - Weekday Morning Peak 6-9am",
    id = "grid_id",
    showNA = FALSE,
    alpha = 0.6,
    popup.vars = c(
      "Number of trips: " = "total_trips"
    ),
    popup.format = list(
      total_trips = list(format = "f", digits = 0)
    )
  ) +
  tm_borders(col = "grey40", lwd = 0.4)

map_honeycomb

<insert description of spatial patterns in 200 words>

Collate hexagon data for Weekend/Holiday Evening Peak 4pm-7pm

Sum up the trips per hexagon

total_trips_by_grid <- hep_gridid %>%
  group_by(grid_id) %>%
  summarise(total_trips = sum(TRIPS, na.rm = TRUE))

Merge geospatial data

total_trips_by_grid <- total_trips_by_grid %>%
  left_join(honeycomb_grid_sf, by = c("grid_id" = "grid_id"))

total_trips_by_grid_sf <- st_sf(total_trips_by_grid)

Plot the Choropleth map

tmap_mode("view")

map_honeycomb = tm_shape(total_trips_by_grid_sf) +
  tm_fill(
    col = "total_trips",
    palette = "Greens",
    style = "cont",
    title = "Total Trips Taken - Weekday Morning Peak 6-9am",
    id = "grid_id",
    showNA = FALSE,
    alpha = 0.6,
    popup.vars = c(
      "Number of trips: " = "total_trips"
    ),
    popup.format = list(
      total_trips = list(format = "f", digits = 0)
    )
  ) +
  tm_borders(col = "grey40", lwd = 0.4)

map_honeycomb

<insert description of spatial patterns in 200 words>

Local Indicators of Spatial Association (LISA) Analysis

  • Compute LISA of the passengers trips generate by origin at hexagon level.

  • Display the LISA maps of the passengers trips generate by origin at hexagon level. The maps should only display the significant (i.e. p-value < 0.05)

  • With reference to the analysis results, draw statistical conclusions (not more than 200 words per visual).